package com.bruce.tool.address.spider.handler;

import com.bruce.tool.address.mysql.constant.DirectRegion;
import com.bruce.tool.address.mysql.constant.Level;
import com.bruce.tool.address.mysql.domain.Region;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.CollectionUtils;

import java.util.List;

/**
 * 功能 :
 *
 * @author : Bruce(刘正航) 3:07 PM 2018/11/19
 */
@Slf4j
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class RegionParser {

    private static String htmlSuffix = "/[0-9]*\\.html";
    private static final String YEAR_CLASS = "center_list_contlist";
    private static final String PROVINCE_CLASS = "provincetr";
    private static final String CITY_CLASS = "citytr";
    private static final String COUNTY_CLASS = "countytr";
    private static final String TOWN_CLASS = "towntr";
    private static final String STREET_CLASS = "villagetr";
    private static final String NULL_STRING = "";

    /**获取年行政区域数据(最新的一年)**/
    public static String parseYear(String content,String year) {
        Elements elements = getElements(content, YEAR_CLASS);
        if( CollectionUtils.isEmpty(elements) ){ return ""; }
        Elements elementshref = elements.get(0).getElementsByTag("a");

        if( CollectionUtils.isEmpty(elementshref) ){ return NULL_STRING; }

        if( StringUtils.isBlank(year) ){ return elementshref.get(0).attr("href"); }

        for (Element element : elementshref) {
            String href = element.attr("href");
            if( href.contains(year) ){
                return href;
            }
        }
        return NULL_STRING;
    }

    public static List<Region> parseProvinces(String content,String currUrl) {
        Elements elements = getElements(content, PROVINCE_CLASS);
        if( CollectionUtils.isEmpty(elements) ){ return Lists.newArrayList(); }
        List<Region> htmls = Lists.newArrayList();
        String baseUrl = currUrl.replace("index.html","");
        for (Element element : elements) {
            Elements as = element.getElementsByTag("a");
            for (Element ae : as) {
                String href = ae.attr("href");
                String name = ae.text();
                Region region = new Region();
                region.setPcode("000000000000");
                region.setReferer(currUrl);
                region.setHref(baseUrl+href);
                region.setName(name);
                String code = href.replaceAll("\\.html","");
                code = Strings.padEnd(code,12,'0');
                region.setCode(code);
                region.setLevel(Level.PROVINCE.getCode());
                if(DirectRegion.isDirect(name)){
                    region.setIsDirect(1);
                }
                htmls.add(region);
            }
        }
        return htmls;
    }

    public static List<Region> parseCities(String content, String currUrl, Region phref) {
        Elements elements = getElements(content, CITY_CLASS);
        if( CollectionUtils.isEmpty(elements) ){ return Lists.newArrayList(); }
        List<Region> cities = Lists.newArrayList();
        String baseUrl = currUrl.replaceAll(htmlSuffix,"\\/");
        for (Element element : elements) {
            Elements as = element.getElementsByTag("a");
            String href = as.get(0).attr("href");
            String code = as.get(0).text();
            String name = as.get(1).text();
            Region region = new Region();
            region.setPcode(phref.getCode());
            region.setCode(code);
            region.setHref(baseUrl+href);
            region.setReferer(currUrl);
            region.setName(name);
            // 如果是直辖市,则用名字覆盖"下一级"
            if( 1 == phref.getIsDirect() ){
                region.setName(phref.getName());
                region.setIsDirect(1);
            }
            region.setLevel(Level.CITY.getCode());
            cities.add(region);
        }
        return cities;
    }

    public static List<Region> parseCounties(String content,String referer, Region parent) {
        String baseUrl = referer.replaceAll("\\.html","\\/");
        return parseRegions(content,referer,baseUrl, parent, COUNTY_CLASS, Level.COUNTY.getCode());
    }

    public static List<Region> parseTowns(String content,String referer, Region parent) {
        String baseUrl = referer.replaceAll(htmlSuffix,"\\/");
        return parseRegions(content, referer,baseUrl, parent, TOWN_CLASS, Level.TOWN.getCode());
    }

    public static List<Region> parseStreets(String content, String referer, Region parent) {
        String baseUrl = referer.replaceAll(htmlSuffix,"\\/");
        return parseRegions(content, referer,baseUrl, parent, STREET_CLASS, Level.STREET.getCode());
    }


    /**解析省市区html**/
    private static List<Region> parseRegions(String content, String referer, String baseUrl, Region parent, String className, Integer level) {
        Elements elements = getElements(content, className);
        if( CollectionUtils.isEmpty(elements) ){ return Lists.newArrayList(); }
        List<Region> streets = Lists.newArrayList();
        for (Element element : elements) {
            Elements etds = element.getElementsByTag("td");
            Elements etda = etds.get(0).getElementsByTag("a");
            String href;
            String code;
            String name;
            if( CollectionUtils.isEmpty(etda) ){
                href = etds.get(0).attr("href");
                code = etds.get(0).text();
                name = etds.get(1).text();
            }else{
                href = etda.attr("href");
                code = etda.text();
                name = etds.get(1).getElementsByTag("a").text();
            }
            Region region = new Region();
            region.setPcode(parent.getCode());
            region.setName(name);
            region.setCode(code);
            if( StringUtils.isNotBlank(href) ){
                region.setHref(baseUrl+href);
            }
            if(1==parent.getIsDirect()){
                region.setIsDirect(1);
            }
            region.setReferer(referer);
            region.setLevel(level);
            streets.add(region);
        }
        return streets;
    }

    private static Elements getElements(String content, String className) {
        Document document = Jsoup.parse(content);
        return document.getElementsByClass(className);
    }
}
